Python 3
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snraw_data= pd.read_csv("vehicle-1.csv")raw_data.head()| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95 | 48.0 | 83.0 | 178.0 | 72.0 | 10 | 162.0 | 42.0 | 20.0 | 159 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197 | van |
| 1 | 91 | 41.0 | 84.0 | 141.0 | 57.0 | 9 | 149.0 | 45.0 | 19.0 | 143 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199 | van |
| 2 | 104 | 50.0 | 106.0 | 209.0 | 66.0 | 10 | 207.0 | 32.0 | 23.0 | 158 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196 | car |
| 3 | 93 | 41.0 | 82.0 | 159.0 | 63.0 | 9 | 144.0 | 46.0 | 19.0 | 143 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207 | van |
| 4 | 85 | 44.0 | 70.0 | 205.0 | 103.0 | 52 | 149.0 | 45.0 | 19.0 | 144 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183 | bus |
raw_data.corr()| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| compactness | 1.000000 | 0.689786 | 0.791707 | 0.691081 | 0.091779 | 0.148249 | 0.812770 | -0.788736 | 0.814248 | 0.676143 | 0.764361 | 0.818674 | 0.585845 | -0.250603 | 0.236685 | 0.157670 | 0.298528 | 0.365552 |
| circularity | 0.689786 | 1.000000 | 0.797180 | 0.625051 | 0.154283 | 0.251407 | 0.858265 | -0.827246 | 0.856603 | 0.965729 | 0.806791 | 0.850863 | 0.935950 | 0.053080 | 0.144968 | -0.011869 | -0.106339 | 0.045652 |
| distance_circularity | 0.791707 | 0.797180 | 1.000000 | 0.771748 | 0.158684 | 0.264621 | 0.907949 | -0.913020 | 0.896273 | 0.775149 | 0.865710 | 0.890541 | 0.706950 | -0.227001 | 0.114665 | 0.266049 | 0.146027 | 0.333648 |
| radius_ratio | 0.691081 | 0.625051 | 0.771748 | 1.000000 | 0.665363 | 0.450486 | 0.738480 | -0.792946 | 0.712744 | 0.571083 | 0.798294 | 0.725598 | 0.541325 | -0.181520 | 0.049112 | 0.174469 | 0.382912 | 0.472339 |
| pr.axis_aspect_ratio | 0.091779 | 0.154283 | 0.158684 | 0.665363 | 1.000000 | 0.648861 | 0.103832 | -0.183492 | 0.079566 | 0.127322 | 0.273738 | 0.089750 | 0.122454 | 0.152860 | -0.058539 | -0.032180 | 0.240201 | 0.267760 |
| max.length_aspect_ratio | 0.148249 | 0.251407 | 0.264621 | 0.450486 | 0.648861 | 1.000000 | 0.165998 | -0.180053 | 0.161603 | 0.305943 | 0.319033 | 0.143745 | 0.189752 | 0.295638 | 0.015446 | 0.043491 | -0.026184 | 0.143919 |
| scatter_ratio | 0.812770 | 0.858265 | 0.907949 | 0.738480 | 0.103832 | 0.165998 | 1.000000 | -0.973504 | 0.992078 | 0.810017 | 0.951672 | 0.996328 | 0.800577 | -0.028006 | 0.074376 | 0.213512 | 0.005171 | 0.118504 |
| elongatedness | -0.788736 | -0.827246 | -0.913020 | -0.792946 | -0.183492 | -0.180053 | -0.973504 | 1.000000 | -0.950405 | -0.776150 | -0.938313 | -0.956488 | -0.766671 | 0.103535 | -0.052243 | -0.186027 | -0.114846 | -0.216769 |
| pr.axis_rectangularity | 0.814248 | 0.856603 | 0.896273 | 0.712744 | 0.079566 | 0.161603 | 0.992078 | -0.950405 | 1.000000 | 0.813135 | 0.938182 | 0.992316 | 0.798522 | -0.015711 | 0.083219 | 0.215200 | -0.019066 | 0.099481 |
| max.length_rectangularity | 0.676143 | 0.965729 | 0.775149 | 0.571083 | 0.127322 | 0.305943 | 0.810017 | -0.776150 | 0.813135 | 1.000000 | 0.746657 | 0.797485 | 0.866554 | 0.041283 | 0.136077 | 0.001660 | -0.104437 | 0.076770 |
| scaled_variance | 0.764361 | 0.806791 | 0.865710 | 0.798294 | 0.273738 | 0.319033 | 0.951672 | -0.938313 | 0.938182 | 0.746657 | 1.000000 | 0.949766 | 0.781016 | 0.112452 | 0.036165 | 0.196202 | 0.014434 | 0.086708 |
| scaled_variance.1 | 0.818674 | 0.850863 | 0.890541 | 0.725598 | 0.089750 | 0.143745 | 0.996328 | -0.956488 | 0.992316 | 0.797485 | 0.949766 | 1.000000 | 0.797318 | -0.016642 | 0.077288 | 0.202398 | 0.006648 | 0.103839 |
| scaled_radius_of_gyration | 0.585845 | 0.935950 | 0.706950 | 0.541325 | 0.122454 | 0.189752 | 0.800577 | -0.766671 | 0.798522 | 0.866554 | 0.781016 | 0.797318 | 1.000000 | 0.192245 | 0.166785 | -0.056067 | -0.225882 | -0.118597 |
| scaled_radius_of_gyration.1 | -0.250603 | 0.053080 | -0.227001 | -0.181520 | 0.152860 | 0.295638 | -0.028006 | 0.103535 | -0.015711 | 0.041283 | 0.112452 | -0.016642 | 0.192245 | 1.000000 | -0.088736 | -0.126686 | -0.752437 | -0.804793 |
| skewness_about | 0.236685 | 0.144968 | 0.114665 | 0.049112 | -0.058539 | 0.015446 | 0.074376 | -0.052243 | 0.083219 | 0.136077 | 0.036165 | 0.077288 | 0.166785 | -0.088736 | 1.000000 | -0.035154 | 0.115728 | 0.097293 |
| skewness_about.1 | 0.157670 | -0.011869 | 0.266049 | 0.174469 | -0.032180 | 0.043491 | 0.213512 | -0.186027 | 0.215200 | 0.001660 | 0.196202 | 0.202398 | -0.056067 | -0.126686 | -0.035154 | 1.000000 | 0.077460 | 0.205115 |
| skewness_about.2 | 0.298528 | -0.106339 | 0.146027 | 0.382912 | 0.240201 | -0.026184 | 0.005171 | -0.114846 | -0.019066 | -0.104437 | 0.014434 | 0.006648 | -0.225882 | -0.752437 | 0.115728 | 0.077460 | 1.000000 | 0.893869 |
| hollows_ratio | 0.365552 | 0.045652 | 0.333648 | 0.472339 | 0.267760 | 0.143919 | 0.118504 | -0.216769 | 0.099481 | 0.076770 | 0.086708 | 0.103839 | -0.118597 | -0.804793 | 0.097293 | 0.205115 | 0.893869 | 1.000000 |
raw_data.describe()| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 846.000000 | 841.000000 | 842.000000 | 840.000000 | 844.000000 | 846.000000 | 845.000000 | 845.000000 | 843.000000 | 846.000000 | 843.000000 | 844.000000 | 844.000000 | 842.000000 | 840.000000 | 845.000000 | 845.000000 | 846.000000 |
| mean | 93.678487 | 44.828775 | 82.110451 | 168.888095 | 61.678910 | 8.567376 | 168.901775 | 40.933728 | 20.582444 | 147.998818 | 188.631079 | 439.494076 | 174.709716 | 72.447743 | 6.364286 | 12.602367 | 188.919527 | 195.632388 |
| std | 8.234474 | 6.152172 | 15.778292 | 33.520198 | 7.891463 | 4.601217 | 33.214848 | 7.816186 | 2.592933 | 14.515652 | 31.411004 | 176.666903 | 32.584808 | 7.486190 | 4.920649 | 8.936081 | 6.155809 | 7.438797 |
| min | 73.000000 | 33.000000 | 40.000000 | 104.000000 | 47.000000 | 2.000000 | 112.000000 | 26.000000 | 17.000000 | 118.000000 | 130.000000 | 184.000000 | 109.000000 | 59.000000 | 0.000000 | 0.000000 | 176.000000 | 181.000000 |
| 25% | 87.000000 | 40.000000 | 70.000000 | 141.000000 | 57.000000 | 7.000000 | 147.000000 | 33.000000 | 19.000000 | 137.000000 | 167.000000 | 318.000000 | 149.000000 | 67.000000 | 2.000000 | 5.000000 | 184.000000 | 190.250000 |
| 50% | 93.000000 | 44.000000 | 80.000000 | 167.000000 | 61.000000 | 8.000000 | 157.000000 | 43.000000 | 20.000000 | 146.000000 | 179.000000 | 363.500000 | 173.500000 | 71.500000 | 6.000000 | 11.000000 | 188.000000 | 197.000000 |
| 75% | 100.000000 | 49.000000 | 98.000000 | 195.000000 | 65.000000 | 10.000000 | 198.000000 | 46.000000 | 23.000000 | 159.000000 | 217.000000 | 587.000000 | 198.000000 | 75.000000 | 9.000000 | 19.000000 | 193.000000 | 201.000000 |
| max | 119.000000 | 59.000000 | 112.000000 | 333.000000 | 138.000000 | 55.000000 | 265.000000 | 61.000000 | 29.000000 | 188.000000 | 320.000000 | 1018.000000 | 268.000000 | 135.000000 | 22.000000 | 41.000000 | 206.000000 | 211.000000 |
x
sn.pairplot(raw_data, diag_kind = 'kde')plt.show()C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\nonparametric\kde.py:447: RuntimeWarning: invalid value encountered in greater X = X[np.logical_and(X > clip[0], X < clip[1])] # won't work for two columns. C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\nonparametric\kde.py:447: RuntimeWarning: invalid value encountered in less X = X[np.logical_and(X > clip[0], X < clip[1])] # won't work for two columns.
raw_data['class']= raw_data['class'].replace({'van': 1, 'bus': 2, 'car': 3})raw_data.describe()| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 846.000000 | 841.000000 | 842.000000 | 840.000000 | 844.000000 | 846.000000 | 845.000000 | 845.000000 | 843.000000 | 846.000000 | 843.000000 | 844.000000 | 844.000000 | 842.000000 | 840.000000 | 845.000000 | 845.000000 | 846.000000 | 846.000000 |
| mean | 93.678487 | 44.828775 | 82.110451 | 168.888095 | 61.678910 | 8.567376 | 168.901775 | 40.933728 | 20.582444 | 147.998818 | 188.631079 | 439.494076 | 174.709716 | 72.447743 | 6.364286 | 12.602367 | 188.919527 | 195.632388 | 2.271868 |
| std | 8.234474 | 6.152172 | 15.778292 | 33.520198 | 7.891463 | 4.601217 | 33.214848 | 7.816186 | 2.592933 | 14.515652 | 31.411004 | 176.666903 | 32.584808 | 7.486190 | 4.920649 | 8.936081 | 6.155809 | 7.438797 | 0.818044 |
| min | 73.000000 | 33.000000 | 40.000000 | 104.000000 | 47.000000 | 2.000000 | 112.000000 | 26.000000 | 17.000000 | 118.000000 | 130.000000 | 184.000000 | 109.000000 | 59.000000 | 0.000000 | 0.000000 | 176.000000 | 181.000000 | 1.000000 |
| 25% | 87.000000 | 40.000000 | 70.000000 | 141.000000 | 57.000000 | 7.000000 | 147.000000 | 33.000000 | 19.000000 | 137.000000 | 167.000000 | 318.000000 | 149.000000 | 67.000000 | 2.000000 | 5.000000 | 184.000000 | 190.250000 | 2.000000 |
| 50% | 93.000000 | 44.000000 | 80.000000 | 167.000000 | 61.000000 | 8.000000 | 157.000000 | 43.000000 | 20.000000 | 146.000000 | 179.000000 | 363.500000 | 173.500000 | 71.500000 | 6.000000 | 11.000000 | 188.000000 | 197.000000 | 3.000000 |
| 75% | 100.000000 | 49.000000 | 98.000000 | 195.000000 | 65.000000 | 10.000000 | 198.000000 | 46.000000 | 23.000000 | 159.000000 | 217.000000 | 587.000000 | 198.000000 | 75.000000 | 9.000000 | 19.000000 | 193.000000 | 201.000000 | 3.000000 |
| max | 119.000000 | 59.000000 | 112.000000 | 333.000000 | 138.000000 | 55.000000 | 265.000000 | 61.000000 | 29.000000 | 188.000000 | 320.000000 | 1018.000000 | 268.000000 | 135.000000 | 22.000000 | 41.000000 | 206.000000 | 211.000000 | 3.000000 |
data= raw_data.drop(['class'], axis= 1)data.median()compactness 93.0 circularity 44.0 distance_circularity 80.0 radius_ratio 167.0 pr.axis_aspect_ratio 61.0 max.length_aspect_ratio 8.0 scatter_ratio 157.0 elongatedness 43.0 pr.axis_rectangularity 20.0 max.length_rectangularity 146.0 scaled_variance 179.0 scaled_variance.1 363.5 scaled_radius_of_gyration 173.5 scaled_radius_of_gyration.1 71.5 skewness_about 6.0 skewness_about.1 11.0 skewness_about.2 188.0 hollows_ratio 197.0 dtype: float64
data.fillna(data.median(), inplace= True)max_length_aspect_ratio= data['max.length_aspect_ratio']sn.boxplot(data= max_length_aspect_ratio)for i in range(846): if max_length_aspect_ratio[i]> 13: max_length_aspect_ratio[i]= max_length_aspect_ratio.median()data['max.length_aspect_ratio']= max_length_aspect_ratioC:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
pr_access_aspect_ratio= data['pr.axis_aspect_ratio']sn.boxplot(data= pr_access_aspect_ratio)for i in range(846): if pr_access_aspect_ratio[i]> 80: pr_access_aspect_ratio[i]= pr_access_aspect_ratio.median()data['pr.axis_aspect_ratio']= pr_access_aspect_ratioC:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
x
radius_ratio= data['radius_ratio']sn.boxplot(data= radius_ratio)for i in range(846): if radius_ratio[i]> 250: radius_ratio[i]= radius_ratio.median()data['radius_ratio']= radius_ratioC:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
data.describe()| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 | 846.000000 |
| mean | 93.678487 | 44.823877 | 82.100473 | 168.230496 | 61.154846 | 8.111111 | 168.887707 | 40.936170 | 20.580378 | 147.998818 | 188.596927 | 439.314421 | 174.706856 | 72.443262 | 6.361702 | 12.600473 | 188.918440 | 195.632388 |
| std | 8.234474 | 6.134272 | 15.741569 | 32.018672 | 5.613458 | 2.074801 | 33.197710 | 7.811882 | 2.588558 | 14.515652 | 31.360427 | 176.496341 | 32.546277 | 7.468734 | 4.903244 | 8.930962 | 6.152247 | 7.438797 |
| min | 73.000000 | 33.000000 | 40.000000 | 104.000000 | 47.000000 | 2.000000 | 112.000000 | 26.000000 | 17.000000 | 118.000000 | 130.000000 | 184.000000 | 109.000000 | 59.000000 | 0.000000 | 0.000000 | 176.000000 | 181.000000 |
| 25% | 87.000000 | 40.000000 | 70.000000 | 141.000000 | 57.000000 | 7.000000 | 147.000000 | 33.000000 | 19.000000 | 137.000000 | 167.000000 | 318.250000 | 149.000000 | 67.000000 | 2.000000 | 5.000000 | 184.000000 | 190.250000 |
| 50% | 93.000000 | 44.000000 | 80.000000 | 167.000000 | 61.000000 | 8.000000 | 157.000000 | 43.000000 | 20.000000 | 146.000000 | 179.000000 | 363.500000 | 173.500000 | 71.500000 | 6.000000 | 11.000000 | 188.000000 | 197.000000 |
| 75% | 100.000000 | 49.000000 | 98.000000 | 194.000000 | 65.000000 | 10.000000 | 198.000000 | 46.000000 | 23.000000 | 159.000000 | 217.000000 | 586.750000 | 198.000000 | 75.000000 | 9.000000 | 19.000000 | 193.000000 | 201.000000 |
| max | 119.000000 | 59.000000 | 112.000000 | 250.000000 | 76.000000 | 13.000000 | 265.000000 | 61.000000 | 29.000000 | 188.000000 | 320.000000 | 1018.000000 | 268.000000 | 135.000000 | 22.000000 | 41.000000 | 206.000000 | 211.000000 |
sn.pairplot(data, diag_kind = 'kde')plt.show()from sklearn.preprocessing import StandardScalerdata = StandardScaler().fit_transform(data)x
from sklearn.model_selection import train_test_splitx_train, x_test, y_train, y_test= train_test_split(data, raw_data['class'], test_size=0.3, random_state=0)x
from sklearn import svmsvma= svm.SVC()svma.fit(x_train, y_train)svm_prediction= svma.predict(x_test)C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning. "avoid this warning.", FutureWarning)
x
from sklearn.metrics import classification_reportfrom sklearn import metricsprint(classification_report(y_test,svm_prediction))print("Accuracy:",metrics.accuracy_score(y_test, svm_prediction))metrics.confusion_matrix(y_test, svm_prediction) precision recall f1-score support
1 0.98 0.98 0.98 53
2 0.98 0.98 0.98 60
3 0.99 0.99 0.99 141
accuracy 0.99 254
macro avg 0.99 0.99 0.99 254
weighted avg 0.99 0.99 0.99 254
Accuracy: 0.9881889763779528
array([[ 52, 1, 0],
[ 0, 59, 1],
[ 1, 0, 140]], dtype=int64)x
from sklearn.model_selection import cross_val_scoreclf = svm.SVC(kernel='linear', C=1)scores = cross_val_score(clf, data, raw_data['class'], cv=5)scores array([0.94117647, 0.95294118, 0.94705882, 0.95857988, 0.95209581])
x
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))Accuracy: 0.95 (+/- 0.01)
# Principal Component Analysisfrom numpy import arrayfrom sklearn.decomposition import PCA# create the PCA instancepca = PCA(8)# fit on datapca.fit(data)print(pca.components_)print(pca.explained_variance_)# transform datapcadata = pca.transform(data)print(pcadata)[[ 0.27273518 0.28741899 0.30232752 0.27011948 0.09689464 0.19559503 0.31072282 -0.30890325 0.30755002 0.27825022 0.29676248 0.30688497 0.26356413 -0.04037111 0.04217391 0.05878818 0.03739285 0.08389378] [-0.09402491 0.13257726 -0.04946733 -0.19626984 -0.26136031 -0.11137445 0.0705384 -0.00837394 0.08256217 0.12324944 0.09259617 0.07752009 0.21356472 0.48006222 -0.04169996 -0.10051474 -0.50896972 -0.51312981] [ 0.07263159 0.19190467 -0.04621247 -0.1126621 -0.06594468 0.20306458 -0.10502353 0.1056912 -0.09627636 0.21855804 -0.16056044 -0.10705279 0.18415833 -0.1008689 0.62746259 -0.58199598 0.0454722 0.04921917] [ 0.12924861 -0.07540313 0.11541801 -0.24458828 -0.6237589 0.23324969 0.03143173 0.0453379 0.0567906 -0.00417757 -0.04156598 0.02793803 -0.11765801 -0.13062663 0.34796787 0.54110943 -0.05623672 0.06510857] [ 0.16250793 -0.14182777 -0.0888463 0.13330928 0.08845701 -0.63156104 0.07899073 -0.0645369 0.07642302 -0.25482915 0.16684589 0.1168687 -0.00504046 0.16370232 0.5606893 0.10269531 0.18777601 -0.10693811] [ 0.23689551 -0.06283367 -0.01550653 -0.15034991 -0.55492977 -0.28721853 0.1038946 -0.08474375 0.11127167 -0.06923381 0.09890243 0.1427139 -0.06509349 -0.20013661 -0.3520953 -0.47126749 0.26222663 0.04719469] [ 0.2591551 -0.37473332 0.13690977 0.17132755 0.04059489 0.4352907 0.0490811 -0.06593772 0.04684041 -0.2982151 0.1521157 0.02886298 -0.46703474 0.29490781 0.08974557 -0.29653525 -0.15724051 -0.07436998] [ 0.19325357 0.11440448 -0.15880511 -0.01157118 -0.12252233 0.05897638 -0.1494669 0.13822565 -0.15467945 0.24557204 0.19197281 -0.143106 0.01699308 0.66945992 -0.09759643 0.09640604 0.3792008 0.33411534]] [9.75662909 3.23134049 1.196735 1.15380498 0.87640578 0.6676913 0.31935505 0.27668731] [[ 0.58070143 -0.68734308 0.26874387 ... -1.7545482 -0.25838013 -0.09008 ] [-1.50893659 -0.40335631 0.52678927 ... -0.11490211 0.26733187 0.18909573] [ 3.91077207 0.14145812 1.14877533 ... -0.67349972 0.68653145 -0.37997 ] ... [ 5.10311221 -0.17171947 0.31439493 ... 0.24336737 0.87232412 -0.13674276] [-3.30484055 -1.00999455 -1.76948991 ... 0.07063498 -0.40494919 -0.55842163] [-4.96003436 0.40647288 -1.15350817 ... 0.40879893 -0.43721497 -0.13532674]]
x_train, x_test, y_train, y_test= train_test_split(pcadata, raw_data['class'], test_size=0.3, random_state=0)svma= svm.SVC()svma.fit(x_train, y_train)svm_prediction= svma.predict(x_test)C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning. "avoid this warning.", FutureWarning)
print(classification_report(y_test,svm_prediction))print("Accuracy:",metrics.accuracy_score(y_test, svm_prediction))metrics.confusion_matrix(y_test, svm_prediction) precision recall f1-score support
1 0.92 0.92 0.92 53
2 0.97 0.98 0.98 60
3 0.98 0.97 0.98 141
accuracy 0.96 254
macro avg 0.96 0.96 0.96 254
weighted avg 0.96 0.96 0.96 254
Accuracy: 0.9645669291338582
array([[ 49, 2, 2],
[ 0, 59, 1],
[ 4, 0, 137]], dtype=int64)from sklearn.model_selection import cross_val_scoreclf = svm.SVC(kernel='linear', C=1)scores = cross_val_score(clf, data, raw_data['class'], cv=5)scores array([0.94117647, 0.95294118, 0.94705882, 0.95857988, 0.95209581])
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))Accuracy: 0.95 (+/- 0.01)
#To increase the accuracy, I have replaced outliers with medians and also standardized before applying the algorithm#In this case, applying PCA is reducing the accuracy. And in this case, PCA is not required as there is relatively less number of columns present#k fold validation for both the cases is same#Without PCA:#Accuracy: 98.8 kfold accuracy: 95#With PCA:#Accuracy: 96.4 kfold accuracy: 95